# Importing necessary libraries for data analysis and visualization
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import matplotlib.image as mpimg # Importing matplotlib image for image plotting
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
# Importing libraries for spatial data and visualization
import geopandas as gpd
import folium
from folium import Figure
import contextily as cx
import libpysal
from libpysal import weights
from libpysal.weights import Queen
# Exploratory Spatial Data Analysis (ESDA) tools
import mapclassify as mc
import esda
from esda.moran import Moran, Moran_Local
# Spatial plotting tools
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation
from splot.libpysal import plot_spatial_weights
from splot.mapping import vba_choropleth
# Statistical modeling
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
warnings.filterwarnings('ignore')
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)This notebook examines spatial dependence in nighttime luminosity across 520 Indian districts using Local Indicators of Spatial Association (LISA). We apply Local Moran’s I to both the initial level and the growth rate of luminosity per capita, and visualize the results as cluster maps identifying statistically significant spatial clusters (HH, LL) and outliers (HL, LH).
Setup
In [1]:
Data
We load district geometries and nighttime lights variables from the GeoPackage, which already contains the 520-district sample merged with all analysis variables.
In [2]:
# Load data from GitHub
import urllib.request, tempfile, os
url = "https://raw.githubusercontent.com/quarcs-lab/project2025s/master/data/maps/india_2001_520.gpkg"
temp_gpkg = os.path.join(tempfile.gettempdir(), "india_2001_520.gpkg")
urllib.request.urlretrieve(url, temp_gpkg)
gdf = gpd.read_file(temp_gpkg, engine="pyogrio")In [3]:
# Rename key variables for convenience
gdf = gdf.rename(columns={
"newdata_light_growth96_10rcr_cap": "growth",
"newdata_log_light96_rcr_cap": "log_initial",
"NAME2_": "district",
})
# Convert from string to numeric (GeoPackage stores these as text)
gdf["growth"] = pd.to_numeric(gdf["growth"], errors="coerce")
gdf["log_initial"] = pd.to_numeric(gdf["log_initial"], errors="coerce")
print(f"Districts: {len(gdf)}")
gdf[["district", "growth", "log_initial"]].describe()Districts: 520
| growth | log_initial | |
|---|---|---|
| count | 520.000000 | 520.000000 |
| mean | 0.023523 | -4.818477 |
| std | 0.043073 | 1.093441 |
| min | -0.095267 | -8.468901 |
| 25% | -0.001208 | -5.488250 |
| 50% | 0.022962 | -4.549616 |
| 75% | 0.037141 | -3.952151 |
| max | 0.282837 | -2.182392 |
The interactive choropleth below maps per capita luminosity growth across all 520 districts. Hover over individual districts to inspect their values.
In [4]:
# Visualize spatial data using the explore() method of a GeoDataFrame
gdf.explore(
# Specify the column to visualize on the map
column='growth',
# Specify the attributes to display in the tooltip when hovering over map features
tooltip=['district', 'log_initial', 'growth'],
# Choose the classification scheme for data visualization
scheme='fisherjenks',
# Specify the number of classes for classification
k=5,
# Choose the colormap for data visualization
cmap='coolwarm',
# Specify whether to display a legend
legend=True,
# Choose the basemap tiles provider
tiles='CartoDB positron',
# Customize the style of the basemap tiles
style_kwds=dict(color="gray", weight=0.5),
# Customize the appearance of the legend
legend_kwds=dict(colorbar=False)
)Make this Notebook Trusted to load map: File -> Trust Notebook